############################################################
## Fraga, Juenke, and Shah 2019: One Run Leads to Another?
## Replication Code: January 2, 2018
## Table A1: State Legislative Candidates by Race/Ethnicity
## Table A2: Distribution of Candidates by State Legislative District Demographics
############################################################

# Required Libraries
require(dplyr)

# Load Data #
sl_data <- read.csv("FragaJuenkeShah_JOP_Data.csv", stringsAsFactors=FALSE)

## Construct table of candidates by year, race, and legislative house
sllow <- subset(sl_data, sl_chamber == 9)
slupp <- subset(sl_data, sl_chamber == 8)

tablA1_low <- sllow %>%
	group_by(year) %>%
	summarize(Total = sum(unknown_cand, asian_cand, black_cand, latino_cand, native_cand, other_cand, white_cand), White = sum(white_cand), Black = sum(black_cand), Latino = sum(latino_cand), Asian = sum(asian_cand), Native = sum(native_cand), Other = sum(other_cand), Unknown = sum(unknown_cand))
tablA1_low <- as.data.frame(tablA1_low)
n <- tablA1_low$year
tablA1_low <- as.data.frame(t(tablA1_low[,-1]))
colnames(tablA1_low) <- n

tablA1_upp <- slupp %>%
	group_by(year) %>%
	summarize(Total = sum(unknown_cand, asian_cand, black_cand, latino_cand, native_cand, other_cand, white_cand), White = sum(white_cand), Black = sum(black_cand), Latino = sum(latino_cand), Asian = sum(asian_cand), Native = sum(native_cand), Other = sum(other_cand), Unknown = sum(unknown_cand))
tablA1_upp <- as.data.frame(tablA1_upp)
n <- tablA1_upp$year
tablA1_upp <- as.data.frame(t(tablA1_upp[,-1]))
colnames(tablA1_upp) <- n

tabA1 <- cbind(tablA1_low, tablA1_upp)
tabA1

# Create Table A2
frame <- as.data.frame(c("0-10\\%","10-20\\%","20-30\\%","30-40\\%","40-50\\%","50-60\\%","60-70\\%","70-80\\%","80-90\\%","90-100\\%"))
colnames(frame) <- "X"

frame$WhiteU <- NA
frame$WhiteU[1] <- sum(subset(slupp, white_pct >= 0 & white_pct < 0.1)$white_cand, na.rm=TRUE)
frame$WhiteU[2] <- sum(subset(slupp, white_pct >= 0.1 & white_pct < 0.2)$white_cand, na.rm=TRUE)
frame$WhiteU[3] <- sum(subset(slupp, white_pct >= 0.2 & white_pct < 0.3)$white_cand, na.rm=TRUE)
frame$WhiteU[4] <- sum(subset(slupp, white_pct >= 0.3 & white_pct < 0.4)$white_cand, na.rm=TRUE)
frame$WhiteU[5] <- sum(subset(slupp, white_pct >= 0.4 & white_pct < 0.5)$white_cand, na.rm=TRUE)
frame$WhiteU[6] <- sum(subset(slupp, white_pct >= 0.5 & white_pct < 0.6)$white_cand, na.rm=TRUE)
frame$WhiteU[7] <- sum(subset(slupp, white_pct >= 0.6 & white_pct < 0.7)$white_cand, na.rm=TRUE)
frame$WhiteU[8] <- sum(subset(slupp, white_pct >= 0.7 & white_pct < 0.8)$white_cand, na.rm=TRUE)
frame$WhiteU[9] <- sum(subset(slupp, white_pct >= 0.8 & white_pct < 0.9)$white_cand, na.rm=TRUE)
frame$WhiteU[10] <- sum(subset(slupp, white_pct >= 0.9 & white_pct <= 1)$white_cand, na.rm=TRUE)
frame$WhiteL <- NA
frame$WhiteL[1] <- sum(subset(sllow, white_pct >= 0 & white_pct < 0.1)$white_cand, na.rm=TRUE)
frame$WhiteL[2] <- sum(subset(sllow, white_pct >= 0.1 & white_pct < 0.2)$white_cand, na.rm=TRUE)
frame$WhiteL[3] <- sum(subset(sllow, white_pct >= 0.2 & white_pct < 0.3)$white_cand, na.rm=TRUE)
frame$WhiteL[4] <- sum(subset(sllow, white_pct >= 0.3 & white_pct < 0.4)$white_cand, na.rm=TRUE)
frame$WhiteL[5] <- sum(subset(sllow, white_pct >= 0.4 & white_pct < 0.5)$white_cand, na.rm=TRUE)
frame$WhiteL[6] <- sum(subset(sllow, white_pct >= 0.5 & white_pct < 0.6)$white_cand, na.rm=TRUE)
frame$WhiteL[7] <- sum(subset(sllow, white_pct >= 0.6 & white_pct < 0.7)$white_cand, na.rm=TRUE)
frame$WhiteL[8] <- sum(subset(sllow, white_pct >= 0.7 & white_pct < 0.8)$white_cand, na.rm=TRUE)
frame$WhiteL[9] <- sum(subset(sllow, white_pct >= 0.8 & white_pct < 0.9)$white_cand, na.rm=TRUE)
frame$WhiteL[10] <- sum(subset(sllow, white_pct >= 0.9 & white_pct <= 1)$white_cand, na.rm=TRUE)

frame$BlackU <- NA
frame$BlackU[1] <- sum(subset(slupp, black_pct >= 0 & black_pct < 0.1)$black_cand, na.rm=TRUE)
frame$BlackU[2] <- sum(subset(slupp, black_pct >= 0.1 & black_pct < 0.2)$black_cand, na.rm=TRUE)
frame$BlackU[3] <- sum(subset(slupp, black_pct >= 0.2 & black_pct < 0.3)$black_cand, na.rm=TRUE)
frame$BlackU[4] <- sum(subset(slupp, black_pct >= 0.3 & black_pct < 0.4)$black_cand, na.rm=TRUE)
frame$BlackU[5] <- sum(subset(slupp, black_pct >= 0.4 & black_pct < 0.5)$black_cand, na.rm=TRUE)
frame$BlackU[6] <- sum(subset(slupp, black_pct >= 0.5 & black_pct < 0.6)$black_cand, na.rm=TRUE)
frame$BlackU[7] <- sum(subset(slupp, black_pct >= 0.6 & black_pct < 0.7)$black_cand, na.rm=TRUE)
frame$BlackU[8] <- sum(subset(slupp, black_pct >= 0.7 & black_pct < 0.8)$black_cand, na.rm=TRUE)
frame$BlackU[9] <- sum(subset(slupp, black_pct >= 0.8 & black_pct < 0.9)$black_cand, na.rm=TRUE)
frame$BlackU[10] <- sum(subset(slupp, black_pct >= 0.9 & black_pct <= 1)$black_cand, na.rm=TRUE)
frame$BlackL <- NA
frame$BlackL[1] <- sum(subset(sllow, black_pct >= 0 & black_pct < 0.1)$black_cand, na.rm=TRUE)
frame$BlackL[2] <- sum(subset(sllow, black_pct >= 0.1 & black_pct < 0.2)$black_cand, na.rm=TRUE)
frame$BlackL[3] <- sum(subset(sllow, black_pct >= 0.2 & black_pct < 0.3)$black_cand, na.rm=TRUE)
frame$BlackL[4] <- sum(subset(sllow, black_pct >= 0.3 & black_pct < 0.4)$black_cand, na.rm=TRUE)
frame$BlackL[5] <- sum(subset(sllow, black_pct >= 0.4 & black_pct < 0.5)$black_cand, na.rm=TRUE)
frame$BlackL[6] <- sum(subset(sllow, black_pct >= 0.5 & black_pct < 0.6)$black_cand, na.rm=TRUE)
frame$BlackL[7] <- sum(subset(sllow, black_pct >= 0.6 & black_pct < 0.7)$black_cand, na.rm=TRUE)
frame$BlackL[8] <- sum(subset(sllow, black_pct >= 0.7 & black_pct < 0.8)$black_cand, na.rm=TRUE)
frame$BlackL[9] <- sum(subset(sllow, black_pct >= 0.8 & black_pct < 0.9)$black_cand, na.rm=TRUE)
frame$BlackL[10] <- sum(subset(sllow, black_pct >= 0.9 & black_pct <= 1)$black_cand, na.rm=TRUE)

frame$HispanicU <- NA
frame$HispanicU[1] <- sum(subset(slupp, latino_pct >= 0 & latino_pct < 0.1)$latino_cand, na.rm=TRUE)
frame$HispanicU[2] <- sum(subset(slupp, latino_pct >= 0.1 & latino_pct < 0.2)$latino_cand, na.rm=TRUE)
frame$HispanicU[3] <- sum(subset(slupp, latino_pct >= 0.2 & latino_pct < 0.3)$latino_cand, na.rm=TRUE)
frame$HispanicU[4] <- sum(subset(slupp, latino_pct >= 0.3 & latino_pct < 0.4)$latino_cand, na.rm=TRUE)
frame$HispanicU[5] <- sum(subset(slupp, latino_pct >= 0.4 & latino_pct < 0.5)$latino_cand, na.rm=TRUE)
frame$HispanicU[6] <- sum(subset(slupp, latino_pct >= 0.5 & latino_pct < 0.6)$latino_cand, na.rm=TRUE)
frame$HispanicU[7] <- sum(subset(slupp, latino_pct >= 0.6 & latino_pct < 0.7)$latino_cand, na.rm=TRUE)
frame$HispanicU[8] <- sum(subset(slupp, latino_pct >= 0.7 & latino_pct < 0.8)$latino_cand, na.rm=TRUE)
frame$HispanicU[9] <- sum(subset(slupp, latino_pct >= 0.8 & latino_pct < 0.9)$latino_cand, na.rm=TRUE)
frame$HispanicU[10] <- sum(subset(slupp, latino_pct >= 0.9 & latino_pct <= 1)$latino_cand, na.rm=TRUE)
frame$HispanicL <- NA
frame$HispanicL[1] <- sum(subset(sllow, latino_pct >= 0 & latino_pct < 0.1)$latino_cand, na.rm=TRUE)
frame$HispanicL[2] <- sum(subset(sllow, latino_pct >= 0.1 & latino_pct < 0.2)$latino_cand, na.rm=TRUE)
frame$HispanicL[3] <- sum(subset(sllow, latino_pct >= 0.2 & latino_pct < 0.3)$latino_cand, na.rm=TRUE)
frame$HispanicL[4] <- sum(subset(sllow, latino_pct >= 0.3 & latino_pct < 0.4)$latino_cand, na.rm=TRUE)
frame$HispanicL[5] <- sum(subset(sllow, latino_pct >= 0.4 & latino_pct < 0.5)$latino_cand, na.rm=TRUE)
frame$HispanicL[6] <- sum(subset(sllow, latino_pct >= 0.5 & latino_pct < 0.6)$latino_cand, na.rm=TRUE)
frame$HispanicL[7] <- sum(subset(sllow, latino_pct >= 0.6 & latino_pct < 0.7)$latino_cand, na.rm=TRUE)
frame$HispanicL[8] <- sum(subset(sllow, latino_pct >= 0.7 & latino_pct < 0.8)$latino_cand, na.rm=TRUE)
frame$HispanicL[9] <- sum(subset(sllow, latino_pct >= 0.8 & latino_pct < 0.9)$latino_cand, na.rm=TRUE)
frame$HispanicL[10] <- sum(subset(sllow, latino_pct >= 0.9 & latino_pct <= 1)$latino_cand, na.rm=TRUE)

frame$AsianU <- NA
frame$AsianU[1] <- sum(subset(slupp, asian_pct >= 0 & asian_pct < 0.1)$asian_cand, na.rm=TRUE)
frame$AsianU[2] <- sum(subset(slupp, asian_pct >= 0.1 & asian_pct < 0.2)$asian_cand, na.rm=TRUE)
frame$AsianU[3] <- sum(subset(slupp, asian_pct >= 0.2 & asian_pct < 0.3)$asian_cand, na.rm=TRUE)
frame$AsianU[4] <- sum(subset(slupp, asian_pct >= 0.3 & asian_pct < 0.4)$asian_cand, na.rm=TRUE)
frame$AsianU[5] <- sum(subset(slupp, asian_pct >= 0.4 & asian_pct < 0.5)$asian_cand, na.rm=TRUE)
frame$AsianU[6] <- sum(subset(slupp, asian_pct >= 0.5 & asian_pct < 0.6)$asian_cand, na.rm=TRUE)
frame$AsianU[7] <- sum(subset(slupp, asian_pct >= 0.6 & asian_pct < 0.7)$asian_cand, na.rm=TRUE)
frame$AsianU[8] <- sum(subset(slupp, asian_pct >= 0.7 & asian_pct < 0.8)$asian_cand, na.rm=TRUE)
frame$AsianU[9] <- sum(subset(slupp, asian_pct >= 0.8 & asian_pct < 0.9)$asian_cand, na.rm=TRUE)
frame$AsianU[10] <- sum(subset(slupp, asian_pct >= 0.9 & asian_pct <= 1)$asian_cand, na.rm=TRUE)
frame$AsianL <- NA
frame$AsianL[1] <- sum(subset(sllow, asian_pct >= 0 & asian_pct < 0.1)$asian_cand, na.rm=TRUE)
frame$AsianL[2] <- sum(subset(sllow, asian_pct >= 0.1 & asian_pct < 0.2)$asian_cand, na.rm=TRUE)
frame$AsianL[3] <- sum(subset(sllow, asian_pct >= 0.2 & asian_pct < 0.3)$asian_cand, na.rm=TRUE)
frame$AsianL[4] <- sum(subset(sllow, asian_pct >= 0.3 & asian_pct < 0.4)$asian_cand, na.rm=TRUE)
frame$AsianL[5] <- sum(subset(sllow, asian_pct >= 0.4 & asian_pct < 0.5)$asian_cand, na.rm=TRUE)
frame$AsianL[6] <- sum(subset(sllow, asian_pct >= 0.5 & asian_pct < 0.6)$asian_cand, na.rm=TRUE)
frame$AsianL[7] <- sum(subset(sllow, asian_pct >= 0.6 & asian_pct < 0.7)$asian_cand, na.rm=TRUE)
frame$AsianL[8] <- sum(subset(sllow, asian_pct >= 0.7 & asian_pct < 0.8)$asian_cand, na.rm=TRUE)
frame$AsianL[9] <- sum(subset(sllow, asian_pct >= 0.8 & asian_pct < 0.9)$asian_cand, na.rm=TRUE)
frame$AsianL[10] <- sum(subset(sllow, asian_pct >= 0.9 & asian_pct <= 1)$asian_cand, na.rm=TRUE)

frame[11,2:9] <- colSums(frame[,2:9])

tabA2 <- frame
tabA2

# Check for consistency across Tables A1 and A2 in number of W, B, L, A candidates
sum(tabA1[2:5,]) == sum(tabA2[11,2:9])